library(pacman)
p_load(tidyverse, lubridate, ggplot2, plotly, viridis, gplots)
arrests <- read_delim(here::here('analyze', 'input', 'arrests_with_state.csv.gz'),
delim=',',
col_types = cols(arrest_date = col_character(),
date_of_birth = col_factor(),
gender = col_factor(),
country_of_citizenship = col_factor(),
event_number = col_factor(),
apprehension_landmark = col_character(),
arrest_method = col_factor(),
most_serious_conviction = col_factor(),
year = col_factor(),
month = col_factor(),
day = col_factor(),
state = col_factor()))
# names(arrests) <- tolower(names(arrests))
# names(arrests) <- str_replace_all(names(arrests), "\\s", "_")
arrests$arrest_date <- mdy(arrests$arrest_date)
# arrests$year <- year(arrests$arrest_date)
# arrests$month <- month(arrests$arrest_date)
# arrests$day <- day(arrests$arrest_date)
arrests$wday <- as.factor(wday(arrests$arrest_date, label=TRUE, abbr=TRUE))
arrests <- arrests %>%
dplyr::select(-c(event_number, date_of_birth))
Arrest counts by state: absolute arrest totals higher in WA; both states show overall decline in arrests but note WA increase in 2017-2018 which is not mirrored in OR.
p1 <- arrests %>%
filter(state %in% c("OR", "WA")) %>%
group_by(state,year) %>%
ggplot(aes(x = year, fill=state)) +
geom_bar(stat='count') +
geom_text(aes(label = after_stat(count)), stat = 'count', vjust=-1.5, color='black') +
ylim(0, 5000) +
facet_wrap(~state) +
scale_fill_viridis_d() +
theme_minimal()
p1
OR arrests by month; no notable seasonal trends:
p2 <- arrests %>%
filter(state == "OR") %>%
group_by(year, month) %>%
summarize(total_arrests = n()) %>%
ggplot(aes(x = month, y = total_arrests, color= year, group = year)) +
geom_line() +
ylim(0, NA) +
scale_color_viridis_d() +
theme_minimal()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
p2
OR arrests by weekday, note most arrests during weekdays.
p3 <- arrests %>%
filter(state == "OR") %>%
group_by(year, wday) %>%
ggplot(aes(x = wday, fill=year, color=year, group = year)) +
geom_bar() +
scale_color_viridis_d() +
scale_fill_viridis_d() +
ylim(0, NA)
p3
Arrest method by state. Note OR shift from “CAP Local Incarceration” to “Non-Custodial Arrest” starting in 2015, not mirrored in WA.
p1 <- arrests %>%
filter(state %in% c("OR", "WA")) %>%
group_by(state, year, arrest_method) %>%
ggplot(aes(x = year, fill= arrest_method, color=arrest_method)) +
geom_bar(stat='count') +
scale_fill_viridis_d(direction=-1) +
scale_color_viridis_d(direction=-1) +
ylim(0, NA) +
labs(title="ICE arrest methods by state") +
xlab('') +
ylab('Arrests') +
facet_grid(~state) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size=8))
ggplotly(p1)
p1
Arrest methods in OR only as percent of total:
p1 <- arrests %>%
filter(state %in% c("OR")) %>%
mutate(arrest_method = as.factor(arrest_method)) %>%
group_by(state, year, arrest_method) %>%
ggplot(aes(x = year, fill= arrest_method, color=arrest_method)) +
geom_bar(stat='count', position='fill') +
scale_y_continuous(labels = scales::percent) +
scale_fill_viridis_d(direction=-1) +
scale_color_viridis_d(direction=-1) +
ylim(0, NA)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
ggplotly(p1)
Arrest method by day of week; not very interesting:
p1 <- arrests %>%
filter(state == "OR") %>%
mutate(arrest_method = as.factor(arrest_method)) %>%
group_by(arrest_method, wday) %>%
ggplot(aes(x = wday, fill=arrest_method, color=arrest_method, group = arrest_method)) +
geom_bar() +
scale_color_viridis_d() +
scale_fill_viridis_d() +
ylim(0, NA)
p1
Country of citizenship
p1 <- arrests %>%
filter(state == "OR") %>%
group_by(year, country_of_citizenship) %>%
summarize(total = n()) %>%
ggplot(aes(x = year, y = total, color=country_of_citizenship, group=country_of_citizenship)) +
geom_line()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
ggplotly(p1)
Note decreasing proportion of Mexican nationality, increase Guatemala and Honduras
top <- arrests %>%
filter(state == "OR") %>%
count(country_of_citizenship) %>%
arrange(desc(n)) %>%
head(5)
p1 <- arrests %>%
filter(state == "OR") %>%
mutate(country = case_when(
country_of_citizenship %in% unlist(top$country_of_citizenship) ~ as.character(country_of_citizenship),
TRUE ~ "ALL OTHERS")) %>%
group_by(year, country) %>%
ggplot(aes(x = year, color = country, fill=country, group=country)) +
geom_bar(stat='count', position='fill')
ggplotly(p1)